
pdft = open(r"E:\project_python\NewN\NLP\Reader\pdftxt.txt","r+",encoding="UTF-8")
wordfile = open(r"2w.txt","r+",encoding="UTF-8")
resfile = open(r"11.txt","r+",encoding="UTF-8")
pdft2 = open(r"pdftxt2.txt","w+",encoding="UTF-8")
pdfHaveWordLines = open(r"pdfHaveWord.txt","w+",encoding="UTF-8")

# for word in wordfile.readlines():
#     word = word.strip()
#     print(word)
lines = []
for i in range(1,39):
    #读取39个文件
    string = r"C:\Users\Lenovo\Desktop\txt\1 (" + str(i) + ")" + ".txt"
    print("正在读第"+str(i)+"个文件")
    file = open(string,"r+",encoding="UTF-8")
    for line in file.readlines():
        if(line != r"\n"):
            line = line.strip()
            lines.append(line)
print(len(lines))


haveWordLines = []
#读出每个单词
for word in wordfile.readlines():
    word = word.strip()
    for line in lines:
        if(word in line):
            print(word+ "    " + line)
            #把有2word的List写进文件里面
            print(line,file=pdfHaveWordLines)
            haveWordLines.append(line)
# print(haveWordLines)
print(len(haveWordLines))
